% HYPOTHETICAL EXPERIMENT/SIMULATION
% N subjects followed up for t time en measured every dt years
% p1 the risk of developing disease when exposed
% p0 the risk of developing disease when not exposed
% e = probability of being exposed
% we assume that there is no loss to follow-up

%%%% Paper simulation scenario IV (MULTIVARIATE ANALYSIS): in scenario IV we assumed that a binary confounder C increased the probability of being 
%%%% exposed (by ce times) as well as the risk of disease by (cd times) in the absent of a causal association between E and D 

clear all

N = 1000;
e = 0.1;  % equal to pe in the article
RR = 1; % the relative risk associated with the exposure
p0 = 0.01;
p1 = RR*p0;
time = 10;
dt = 1;
de = 1; % getting the disease increases the probability of getting exposed to E by de times
c = 0.1; % the probability of getting exposed to variable C
cd = 5; % the magnitude with which variable C increases the risk of getting the disease
ce = 5; % the magnitude with which variable C increases the probability of getting exposed

time = time+1; 
age_at_date=zeros(N,time); % Matrix for the biological age at a certain moment in time

%The subject array matrix with 7 layers: 
subject = zeros(N,time,7);
subject(:,:,1)=p0;

% Layer 5 of the subject matrix represents the probability of getting exposed
subject(:,:,5)=e;

% Initialize the random number generator to make the results repeatable.
rng(0,'twister');

% Layer 6 of the subject matrix represents the probability of being exposed to variable C which
% influences both E and D
subject(:,:,6)=c;

% Layer 7 of the subject matrix represent the fixed binary variable C:
subject(:,:,7)=0;

% ted=zeros(N,2); %ted is a matrix in which column 1 contains the time of first exposure and column 2 the time of disease onset
% ted(:,1)= 0; % Default mode is no exposure
% ted(:,2)= 0; % Default mode is no disease
% 
% ted2=zeros(N,2); %ted2 is a matrix in which column 1 contains the biological age at first exposure and column 2 the biological age at disease onset




for s=1:N  % s is the subject number
 
  counterC=0;counterE=0;counterD=0; % By definition every subject can only be exposed to E and C once and can get the disease only once
 
  for t=1:dt:time-1
 
  % We start with the confounder C because in simulation IV: this
  % influences the probability of E and D but not vice versa
  lambdac=subject(s,t,6); % The probability of getting exposed to C
  wtc = -log(rand (1, 1))/lambdac; % Poisson waiting times follow an exponential distribution.
  if (wtc<=dt && counterC==0)  % If the time of exposure is within dt than this individual  is 
      subject(s,t+1:time,7)=1;  % exposed to C until the end of follow-up
      subject(s,t+1:time,1)=subject(s,t,1)*cd; % And has a higher probability of getting the disease D
      subject(s,t+1:time,5)=subject(s,t,5)*ce; % And has a higher probability of getting exposed to E
      counterC=1;
  end 
 
  lambdae=subject(s,t,5); % The probability of getting exposed to E
  wte = -log(rand (1, 1))/lambdae; % Poisson waiting times follow an exponential distribution.
  if (wte<=dt && counterE==0)  % If the time of exposure is within dt than this individual  is 
      subject(s,t+1:time,2)=1;  % exposed until the end of follow-up
      subject(s,t+1:time,1)=p1; % And has a higher (or lower) probability of getting the disease
      counterE=1;
  end 
  
  lambdad=subject(s,t,1); % The probability of getting the disesae D
  wtd = -log(rand (1, 1))/lambdad; % Poisson waiting times follow an exponential distribution. 
  if (wtd<=dt && counterD==0)   % If the time till disease is within dt than this individual has  
      subject(s,t+1:time,3)=1; % the disease
      subject(s,t+1:time,5)= subject(s,t,5)*de; % and has de times higher probability of getting exposed
      counterD=1;
  end
  
  subject(s,t,4)=t-1; % Layer 4 contains the 'follow-up time' of every individual
  end
end

% s = (1:N);

%plot(s,ted(:,1),'-',s, ted(:,2),'--');

%%%%%% Path to informatics.jar
javaaddpath('C:\Users\Ahmad Aziz\Desktop\Papers\Causal_inference\infodynamics-dist-1.2.1/infodynamics.jar');

%% A 2-dimensional time-window of length tw from the cohort

tw = time-1;
tb=1; % The time at the beginning of the time-window



sourceArrayE0=subject(:,tb:tb+tw,2); % select the exposure E row 
sourceArrayC0= subject(:,tb:tb+tw,7); % select the confounder C row
destArrayD0=subject(:,tb:tb+tw,3); % select the disease row as destination

%% make two large columns
sourceArrayE1=sourceArrayE0';
sourceArrayC1=sourceArrayC0';
destArrayD1=destArrayD0';
sourceArrayE2=sourceArrayE1(:);
sourceArrayC2=sourceArrayC1(:);

sourceArray=horzcat(sourceArrayE2,sourceArrayC2);
destArray=destArrayD1(:);

k=10;


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Code from Example 5 - Multivariate transfer entropy on binary data 

teCalc=javaObject('infodynamics.measures.discrete.TransferEntropyCalculatorDiscrete', 4, k);
teCalc.initialise();
% We need to construct the joint values of the dest and source before we pass them in,
% and need to use the matrix conversion routine when calling from Matlab/Octave:
mUtils=javaObject('infodynamics.utils.MatrixUtils');
teCalc.addObservations(mUtils.computeCombinedValues(octaveToJavaIntMatrix(sourceArray), 2), ...
		mUtils.computeCombinedValues(octaveToJavaIntMatrix(destArray), 2));

teCalc.addObservations(mUtils.computeCombinedValues(sourceArray,2), mUtils.computeCombinedValues(destArray,2));

% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% % Code from Example 1 - Transfer entropy on binary data
% 
% % global teCalc
% 
% teCalc=javaObject('infodynamics.measures.discrete.TransferEntropyCalculatorDiscrete', 2, tw);
% teCalc.initialise();
% % Since we have simple arrays of ints, we can directly pass these in:
% teCalc.addObservations(sourceArray, destArray);

% Calculation of the TE:
result = teCalc.computeAverageLocalOfObservations();
fprintf('The transfer entropy is %.4f bits.\n', result);

% cs = teCalc.computeSignificance(1000);
% meancs = getMeanOfDistribution(cs)
% sd = getStdOfDistribution(cs)
% %tscore = getTSscore(cs) %Assuming the distribution is Gaussian, return a t-score for our observed measurement
% pvalue = cs.pValue
%  % % t2=(result2-mean)/sd

%%% Perform permutation based resampling in order to calculate a
%%% significance for the conditioned TE, i.e. TE(E->D|C)
repeats= 100;

for i=1:repeats

sourceArray_shuffled = sourceArray(randperm(size(sourceArray,1)),:); % random shuffling of the ROWS of the sourceArray; each row has to be regarded as a vector in this case (see Lizier 2010, J Comp Neurosci. p 5)
    
% Calculate the multivariate TE (i.e. TE(E,C->D))
teCalc=javaObject('infodynamics.measures.discrete.TransferEntropyCalculatorDiscrete', 4, k);
teCalc.initialise();
% We need to construct the joint values of the dest and source before we pass them in,
% and need to use the matrix conversion routine when calling from Matlab/Octave:
mUtils=javaObject('infodynamics.utils.MatrixUtils');
teCalc.addObservations(mUtils.computeCombinedValues(octaveToJavaIntMatrix(sourceArray_shuffled), 2), ...
		mUtils.computeCombinedValues(octaveToJavaIntMatrix(destArray), 2));
teCalc.addObservations(mUtils.computeCombinedValues(sourceArray_shuffled,2), mUtils.computeCombinedValues(destArray,2));

% Calculation of TE(E,C->D):
resultMV(i) = teCalc.computeAverageLocalOfObservations(); % resultMV equals to TE(E,C->D)

end

% Calculate the 95%:
meanMV = mean(resultMV);
fprintf('The mean of the null-distribution for the multivariate transfer entropy (TE(E,C->D) is %.8f.\n', meanMV);
fprintf('The 95th percentile of this null-distribution for the multivariate transfer entropy (TE(E,C->D) is %.8f.\n', prctile(resultMV,95));

pMV = (100-invprctile(resultMV,result,2))/100  % pMV equals the probability of this value or higher 

[h,p,ci,stats] = ttest(resultMV,result)

% %%%% Bootrstrap cofidence intervals %%%%%%%%%%%%%%%%%% 
% global counter btci_dist
% counter = 0;
% h = @aziz_bootstrapci_bin_transferentropy;
% bci = bootci(100,{h,sourceArray', destArray'}, 'alpha', 0.05, 'type', 'bca')

%dist=cs.distribution;

[correlation,pval1] = corrcoef(destArray(:,1),sourceArray(:,1))
[partialcorrelation,pval2]=partialcorri(destArray(:,1), sourceArray(:,1),sourceArray(:,2))

%rho=corr(sourceArray',destArray')
%Pearson_corr(s)=pr(1,2);

%%% Save data for performing a cross-tabulation and calculating the
%%% chi-square statistic: a cross-section of the cohort at the end of the follow-up period
ct = time;
exposure=subject(:,ct,2);
disease =subject(:,ct,3);
confounder =subject(:,ct,7);

save('aziz_transferentropy_simulation4_3.mat', 'sourceArrayC2', 'sourceArrayE2','destArray');


% hist(dist);
% [counts, bins] = hist(dist);
% plot(bins, counts); %# get a line plot of the histogram

% tev is a vector containing the individual transfer entropies